Skip to content

Fix the bug in canonicalize-live-in pass#117

Merged
tancheng merged 6 commits intocoredac:mainfrom
ShangkunLi:fix-canonicalize
Aug 21, 2025
Merged

Fix the bug in canonicalize-live-in pass#117
tancheng merged 6 commits intocoredac:mainfrom
ShangkunLi:fix-canonicalize

Conversation

@ShangkunLi
Copy link
Copy Markdown
Collaborator

@ShangkunLi ShangkunLi commented Aug 20, 2025

In previous --canonicalize-live-in pass, we traverse the blocks in topological order. But for case like:

module {
  func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref<?x1x1x1x1x128xi8>, %arg1: memref<?x1x128x1x1x128xi8>) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>} {
    %0 = "neura.constant"() <{value = 1 : index}> : () -> index
    %1 = "neura.constant"() <{value = 128 : index}> : () -> index
    %2 = "neura.constant"() <{value = 0 : index}> : () -> index
    %3 = "neura.cast"(%2) <{cast_type = "index_to_int"}> : (index) -> i64
    neura.br %3 : i64 to ^bb1
  ^bb1(%4: i64):  // 2 preds: ^bb0, ^bb5
    %5 = "neura.cast"(%4) <{cast_type = "int_to_index"}> : (i64) -> index
    %6 = "neura.icmp"(%5, %1) <{cmpType = "slt"}> : (index, index) -> i1
    neura.cond_br %6 : i1 then to ^bb2 else to ^bb6
  ^bb2:  // pred: ^bb1
    %7 = "neura.cast"(%2) <{cast_type = "index_to_int"}> : (index) -> i64
    neura.br %7 : i64 to ^bb3
  ^bb3(%8: i64):  // 2 preds: ^bb2, ^bb4
    %9 = "neura.cast"(%8) <{cast_type = "int_to_index"}> : (i64) -> index
    %10 = "neura.icmp"(%9, %1) <{cmpType = "slt"}> : (index, index) -> i1
    neura.cond_br %10 : i1 then to ^bb4 else to ^bb5
  ^bb4:  // pred: ^bb3
    %11 = neura.load_indexed %arg0[%2, %2, %2, %2, %2, %9 : index, index, index, index, index, index] memref<?x1x1x1x1x128xi8> : i8
    neura.store_indexed %11 to %arg1[%2, %2, %5, %2, %2, %9 : index, index, index, index, index, index] memref<?x1x128x1x1x128xi8> : i8
    %12 = "neura.add"(%9, %0) : (index, index) -> index
    %13 = "neura.cast"(%12) <{cast_type = "index_to_int"}> : (index) -> i64
    neura.br %13 : i64 to ^bb3
  ^bb5:  // pred: ^bb3
    %14 = "neura.add"(%5, %0) : (index, index) -> index
    %15 = "neura.cast"(%14) <{cast_type = "index_to_int"}> : (index) -> i64
    neura.br %15 : i64 to ^bb1
  ^bb6:  // pred: ^bb1
    "neura.return"() : () -> ()
  }
}

When we identify the %2 in bb2 as a live-in and wrap it in the block arguments of bb2 and update corresponding operands of neura.cond_br in bb1. Now %2 is also in neura.cond_br and thus a live-in for bb1. But current implementation fails to update the block arguments of bb1.

Thus current implementation results in:

  func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref<?x1x1x1x1x128xi8>, %arg1: memref<?x1x128x1x1x128xi8>) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>} {
    %0 = "neura.constant"() <{predicate = true, value = "%arg0"}> : () -> memref<?x1x1x1x1x128xi8>
    %1 = "neura.constant"() <{predicate = true, value = "%arg1"}> : () -> memref<?x1x128x1x1x128xi8>
    %2 = "neura.constant"() <{value = 1 : i64}> : () -> i64
    %3 = "neura.constant"() <{value = 128 : i64}> : () -> i64
    %4 = "neura.constant"() <{value = 0 : i64}> : () -> i64
    neura.br %4, %3 : i64, i64 to ^bb1
  ^bb1(%5: i64, %6: i64):  // 2 preds: ^bb0, ^bb5
    %7 = "neura.icmp"(%5, %6) <{cmpType = "slt"}> : (i64, i64) -> i1
    neura.cond_br %7 : i1 then %4 : i64 to ^bb2 else to ^bb6
  ^bb2(%8: i64):  // pred: ^bb1
    neura.br %8, %3 : i64, i64 to ^bb3
  ^bb3(%9: i64, %10: i64):  // 2 preds: ^bb2, ^bb4
    %11 = "neura.icmp"(%9, %10) <{cmpType = "slt"}> : (i64, i64) -> i1
    neura.cond_br %11 : i1 then %0, %4, %9, %1, %5, %2, %3 : memref<?x1x1x1x1x128xi8>, i64, i64, memref<?x1x128x1x1x128xi8>, i64, i64, i64 to ^bb4 else %5, %2, %3 : i64, i64, i64 to ^bb5
  ^bb4(%12: memref<?x1x1x1x1x128xi8>, %13: i64, %14: i64, %15: memref<?x1x128x1x1x128xi8>, %16: i64, %17: i64, %18: i64):  // pred: ^bb3
    %19 = neura.load_indexed %12[%13, %13, %13, %13, %13, %14 : i64, i64, i64, i64, i64, i64] memref<?x1x1x1x1x128xi8> : i8
    neura.store_indexed %19 to %15[%13, %13, %16, %13, %13, %14 : i64, i64, i64, i64, i64, i64] memref<?x1x128x1x1x128xi8> : i8
    %20 = "neura.add"(%14, %17) : (i64, i64) -> i64
    neura.br %20, %18 : i64, i64 to ^bb3
  ^bb5(%21: i64, %22: i64, %23: i64):  // pred: ^bb3
    %24 = "neura.add"(%21, %22) : (i64, i64) -> i64
    neura.br %24, %23 : i64, i64 to ^bb1
  ^bb6:  // pred: ^bb1
    "neura.return"() : () -> ()
  }

Therefore, in this pr:

  • Fix the bug that cannot handle such a case in current --canonicalize-live-in pass
  • We are able to handle arbitrary control flow now, but may introduce non-sense/redundant phi-grant_predicate-ctrl_mov chain
  • These redundancies can be removed through --fuse-control-flow in the future. Since they are generated by our canonicalization, they have regular dependencies and are easy to remove.

And the new canonicalized ir looks like:

  func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref<?x1x1x1x1x128xi8>, %arg1: memref<?x1x128x1x1x128xi8>) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>} {
    %0 = "neura.constant"() <{predicate = true, value = "%arg0"}> : () -> memref<?x1x1x1x1x128xi8>
    %1 = "neura.constant"() <{predicate = true, value = "%arg1"}> : () -> memref<?x1x128x1x1x128xi8>
    %2 = "neura.constant"() <{value = 1 : i64}> : () -> i64
    %3 = "neura.constant"() <{value = 128 : i64}> : () -> i64
    %4 = "neura.constant"() <{value = 0 : i64}> : () -> i64
    neura.br %4, %3, %4, %0, %1, %2 : i64, i64, i64, memref<?x1x1x1x1x128xi8>, memref<?x1x128x1x1x128xi8>, i64 to ^bb1
  ^bb1(%5: i64, %6: i64, %7: i64, %8: memref<?x1x1x1x1x128xi8>, %9: memref<?x1x128x1x1x128xi8>, %10: i64):  // 2 preds: ^bb0, ^bb5
    %11 = "neura.icmp"(%5, %6) <{cmpType = "slt"}> : (i64, i64) -> i1
    neura.cond_br %11 : i1 then %7, %6, %8, %9, %5, %10 : i64, i64, memref<?x1x1x1x1x128xi8>, memref<?x1x128x1x1x128xi8>, i64, i64 to ^bb2 else to ^bb6
  ^bb2(%12: i64, %13: i64, %14: memref<?x1x1x1x1x128xi8>, %15: memref<?x1x128x1x1x128xi8>, %16: i64, %17: i64):  // pred: ^bb1
    neura.br %12, %13, %14, %12, %15, %16, %17 : i64, i64, memref<?x1x1x1x1x128xi8>, i64, memref<?x1x128x1x1x128xi8>, i64, i64 to ^bb3
  ^bb3(%18: i64, %19: i64, %20: memref<?x1x1x1x1x128xi8>, %21: i64, %22: memref<?x1x128x1x1x128xi8>, %23: i64, %24: i64):  // 2 preds: ^bb2, ^bb4
    %25 = "neura.icmp"(%18, %19) <{cmpType = "slt"}> : (i64, i64) -> i1
    neura.cond_br %25 : i1 then %20, %21, %18, %22, %23, %24, %19 : memref<?x1x1x1x1x128xi8>, i64, i64, memref<?x1x128x1x1x128xi8>, i64, i64, i64 to ^bb4 else %23, %24, %19, %21, %20, %22 : i64, i64, i64, i64, memref<?x1x1x1x1x128xi8>, memref<?x1x128x1x1x128xi8> to ^bb5
  ^bb4(%26: memref<?x1x1x1x1x128xi8>, %27: i64, %28: i64, %29: memref<?x1x128x1x1x128xi8>, %30: i64, %31: i64, %32: i64):  // pred: ^bb3
    %33 = neura.load_indexed %26[%27, %27, %27, %27, %27, %28 : i64, i64, i64, i64, i64, i64] memref<?x1x1x1x1x128xi8> : i8
    neura.store_indexed %33 to %29[%27, %27, %30, %27, %27, %28 : i64, i64, i64, i64, i64, i64] memref<?x1x128x1x1x128xi8> : i8
    %34 = "neura.add"(%28, %31) : (i64, i64) -> i64
    neura.br %34, %32, %26, %27, %29, %30, %31 : i64, i64, memref<?x1x1x1x1x128xi8>, i64, memref<?x1x128x1x1x128xi8>, i64, i64 to ^bb3
  ^bb5(%35: i64, %36: i64, %37: i64, %38: i64, %39: memref<?x1x1x1x1x128xi8>, %40: memref<?x1x128x1x1x128xi8>):  // pred: ^bb3
    %41 = "neura.add"(%35, %36) : (i64, i64) -> i64
    neura.br %41, %37, %38, %39, %40, %36 : i64, i64, i64, memref<?x1x1x1x1x128xi8>, memref<?x1x128x1x1x128xi8>, i64 to ^bb1
  ^bb6:  // pred: ^bb1
    "neura.return"() : () -> ()
  }

@ShangkunLi ShangkunLi marked this pull request as ready for review August 20, 2025 12:48
@tancheng tancheng requested review from HobbitQia and MeowMJ August 20, 2025 13:43
@tancheng tancheng added bug Something isn't working enhancement New feature or request labels Aug 20, 2025
@tancheng tancheng merged commit 87342f8 into coredac:main Aug 21, 2025
1 check passed
@ShangkunLi ShangkunLi linked an issue Aug 25, 2025 that may be closed by this pull request
ShangkunLi pushed a commit that referenced this pull request Mar 12, 2026
Fix the bug in canonicalize-live-in pass
ShangkunLi pushed a commit that referenced this pull request Mar 12, 2026
Fix the bug in canonicalize-live-in pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

bug Something isn't working enhancement New feature or request

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[P1] Transform Ctrl to Data Flow Error

2 participants